****************************************************************
**** Find groups that change over 3 years to match botox pre-filing
cd /disk/agedisk3/medicare.work/poterba-DUA52260/jetson-dua52260/botox/synth/100pct/
use op_carrier_100pct_clean_botox.dta, clear 

* How many HCPCS codes observed? 
preserve
duplicates drop hcpcs_cd, force
count 
restore 

*** Drop groups with 0 spend ever 
bysort g: egen minmonth = min(pmt_amt)
drop if minmonth == 0 

*** Cut discontinuous groups
gen disc = L1.pmt_amt/pmt_amt
bysort g: egen maxdisc = max(abs(disc))
bysort g: egen mindisc = min(abs(disc))
drop if maxdisc > 2 | mindisc < .5 

** Filing date: 6/5/07
disp mofd(mdy(6,5,2007))
*** Botox exhibits growth 2002 to 2007 pre-filing
*twoway scatter pmt_amt dt if group == 99999, xline(569)

*** Create year panel
gen year = year(dofm(dt))

**** Look at year series of botox 
collapse (sum) pmt_amt (last) hcpcs_cd, by(g year)
* twoway scatter pmt_amt year if g == 99999, xline(2007)

*** Panelize by year 
tsset g year 


* *** Find growth groups over 3 years 
gen L3pmt = L3.pmt_amt
gen growth = pmt_amt/L3pmt

* *** Allergan defendant had 1.596x growth over three years prior to 2006
*** From around twenty million to 39 million
* Find other growht groups in the same range, pattern
drop if mi(growth)
drop if year > 2011
sort growth 
* Growth: (25% bandwidth around 1.596)
keep if growth > 1.596*.75 & growth <1.596*1.25
* Base level: (10% bandwidth around 20.357)
keep if L3pmt > 20357492*.9 & L3pmt < 20357492*1.1
* Giving a wider bandwidth to growth because the patterns arent quite linear month to month, using year series growth to determine
*** Make unique
duplicates drop g, force 

*** Recode groups 
*** Because names too long breaks reshape
encode hcpcs_cd, gen(group)
replace group = 999 if hcpcs_cd == "botox_treated"

*** Make uniques
duplicates drop group, force
keep group g hcpcs_cd
drop if group == 999
count
save growthgroups.dta, replace
